colab 要上傳檔案
# 上傳檔案
uploaded_files = files.upload()
# 從字典中取得檔案名稱
uploaded_file_name = list(uploaded_files.keys())[0]
# 讀取檔案內容
content = uploaded_files[uploaded_file_name]
# 將字串轉換為UTF-8格式(這是可選的,具體取決於您的文件)
content = content.decode('utf-8')
# 將內容保存到line.txt檔案中
with open('line.txt', 'w', encoding='utf-8') as file:
file.write(content)
# 顯示檔案名稱和內容(供測試用)
print(f"檔案名稱:{uploaded_file_name}")
=====
#設定
YOUR_NAME="XXX"
HER_NAME="SSSS"
!pip install jieba
!pip install cutecharts
import re
import jieba
from datetime import datetime
from cutecharts.charts import Bar, Pie
from cutecharts.components import Page
content = open('line.txt', 'r', encoding='utf-8').read()
words = jieba.lcut(content)
counts = {}
for word in words:
if len(word) <= 1 or word.isdigit():
continue
else:
counts[word] = counts.get(word, 0) + 1
text = ' '.join(words)
excludes = {'\r\n', '下午', '上午', '...'}
for exword in excludes:
try:
del(counts[exword])
except:
continue
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)
top_words = []
top_counts = []
i = -1
while len(top_words) <= 10:
i += 1
word, count = items[i]
if word == "通話" or word == "照片" or word == "影片" or word == "貼圖" or word == YOUR_NAME or word == HER_NAME:
continue
top_words.append(word)
top_counts.append(count)
chart = Bar("關鍵字圖表")
chart.set_options(labels=top_words, x_label="單詞", y_label="出現次數")
chart.add_series("次數", top_counts)
chart2 = Pie("通話/影片/照片數統計")
chart2.set_options(labels=['照片', '影片', '通話'])
chart2.add_series([counts.get("照片", 0), counts.get("影片", 0), counts.get("通話", 0)])
chart3 = Pie("傳送訊息量")
chart3.set_options(labels=[YOUR_NAME, HER_NAME], inner_radius=0)
chart3.add_series([counts.get(YOUR_NAME, 0), counts.get(HER_NAME, 0)])
pattern = r"(?m)^.{10}((\w+))(?=\n)"
weekdays_counts = {}
with open("line.txt", "r") as f:
content = f.read()
matches = re.finditer(pattern, content)
for match in matches:
weekday = match.group(1)
# 將星期名稱轉換為中文表示
if weekday in ("Monday", "二"):
weekday = "二"
elif weekday in ("Wednesday", "三"):
weekday = "三"
elif weekday in ("Thursday", "四"):
weekday = "四"
elif weekday in ("Friday", "五"):
weekday = "五"
elif weekday in ("Saturday", "六"):
weekday = "六"
elif weekday in ("Sunday", "日"):
weekday = "日"
weekdays_counts[weekday] = weekdays_counts.get(weekday, 0) + 1
for weekday, count in weekdays_counts.items():
print(f"{weekday}: {count}")
chart4 = Bar("星期資料統計")
chart4.set_options(labels=list(weekdays_counts.keys()), x_label="星期", y_label="次數")
chart4.add_series("次數", list(weekdays_counts.values()))
page = Page()
page.add(chart)
page.add(chart2)
page.add(chart3)
page.add(chart4)
html_file_path = "charts.html"
page.render(html_file_path)
from google.colab import files
files.download("charts.html")